\documentclass[]{article}

\usepackage{amsfonts}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{xfrac}
\usepackage{marvosym}
\usepackage{ltxcmds} % get commands: \ltx@empty, \ltx@carzero
\usepackage{enumerate}
%\usepackage[sort&compress]{natbib}

%\usepackage{hyperref}
\usepackage{amsfonts}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{textcomp}
\usepackage{amsthm}
\usepackage{bigstrut}
\usepackage[margin=1in]{geometry}
\usepackage{natbib}
\usepackage{graphicx}
\usepackage{listings}
\usepackage{color}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{cancel}
\usepackage{chngcntr}
\usepackage{extarrows}
\usepackage{epstopdf}  
\usepackage{bigstrut} 
\usepackage{rotating} 
\usepackage{multirow} 
\usepackage{booktabs} 
\usepackage{authblk}


\theoremstyle{definition}% default plain
\newtheorem{thm}{Theorem}
\newtheorem{prop}{Proposition}
\newtheorem{cor}{Corollary}
\renewcommand{\qedsymbol}{$\blacksquare$}
\newtheorem{lem}{Lemma}
\newtheorem*{notation}{Notation}


\theoremstyle{definition}
\newtheorem*{defn}{Definition}
\newtheorem{conj}{Conjecture}
\newtheorem{exmp}{Example}
\newtheorem{assu}{Assumption}
\newtheorem{prope}{Property}
\newtheorem{note}{Note}
\newtheorem{remark}{Remark}
\newtheorem{obs}{Observation}
\newcommand{\Gbar}{\overline{G}}
\newcommand{\Hbar}{\overline{H}}
\newcommand{\Fbar}{\overline{F}}
\newcommand{\Ex}{\mathbf{\mathbb{E\/}}}

\newcommand{\pEx}[2]{\mathbf{E\/}\big(#1,#2\big)}
%\newcommand{\hr}[2] {
%\dfrac{#1(#2)}{\overline{\MakeUppercase#1}(#2)}}
%\newcommand{\hrt}[1] {
%\dfrac{#1(t)}{\overline{\MakeUppercase#1}(t)}}
%\newcommand{\Pr}{\mathbb{P\/}}
\newcommand{\diff}[2] {
\frac{d}{d #2}#1}
\newcommand{\hrate}[2] {
\mathbf{r}_{#1}(#2)}
\newcommand{\surv}[2]{\mathbf{\overline{F}}_{#1}(#2)}
\newcommand{\cdf}[1]{\mathbf{\overline{F}}_{#1}}
\newcommand{\pdf}[2]{\mathbf{{f}}_{#1}(#2)}
\newcommand{\MG}[2]{\mathbf{{MG}}_{#1}(#2)}
%\counterwithin{cor}{thm}
%\counterwithin{cor}{lem}
\newcommand{\noat}[1]{\textcolor{red}{ #1}}
\newcommand{\pr}{\mathbb{P}\/}
\newcommand{\ve}[1] {\mathbf{#1}}


\makeatletter
\newtheorem*{rep@theorem}{\rep@title}
\newcommand{\newreptheorem}[2]{%
\newenvironment{rep#1}[1]{%
 \def\rep@title{#2 \ref{##1}}%
 \begin{rep@theorem}}%
 {\end{rep@theorem}}}
\makeatother


\newtheorem{theorem}{Theorem}
\newreptheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary}
\newreptheorem{corollary}{Corollary}












\begin{document}
\bibliographystyle{abbrv}
\section{Modeling Framework}
We consider an ESRD patient receiving HD with at least one unused AVF opportunity. We assume that the patient does not consider other treatment options such as kidney transplantation, and thus will depend on HD until death. Also, we assume that the patient has to choose between two vascular access types, CVC, and AVF, i.e. we do not consider AVGs here. We will discuss these limitations further in Section \ref{sec:dis}. There is a considerable time lag from the AVF referral until the time it is mature enough for HD. 
Since CVC can be placed and used right away (\cite{KurellaOpt}), during this lead time, the patient will recieve HD via a CVC.

In Figure \ref{fig:decision}, the decision making framework is illustrated. As the decision flowchart suggests, we have made the following assumption:
\begin{assu} [Decision points] \label{ass:dec}
 A patient cannot start the AVF creation process while an AVF is under preparation or when it is being used.
\end{assu}
 Although it might be optimal to create a new AVF when the one being used is approaching the end of its lifetime, this is not clinically realistic and thus we did not considered it here.
 \noat{\\I have removed the sentence `in other words, the only time AVF creation is under consideration is when a patient has just started HD with a CVC, or when a functional AVF has just failed.', because consider a scenario in which AVF has failed a year ago; still we can consider AVF creation....}

\begin{figure}[htbp]
\centering
\includegraphics[scale=.8]{./files/decision.pdf}
\caption{Vascular access choice for HD}
\label{fig:decision}
\end{figure}
The dynamics and principles of the model can be summarized as follows:
\begin{enumerate}
\item Patient receive HD via an AVF as long as they have a functional AVF.
\item When there is no functional AVF (whether when one fails or at the beginning of HD when the patient starts HD without an AVF) the patient receives HD via a CVC. During this time, the policy in use determines \textit{\textbf{whether}} and \textit{\textbf{when}} to do the AVF surgery.
\item If the policy in use recommends an AVF surgery, the patients goes through the AVF creation process, and wait until a functional AVF becomes available.
\item The patient switches back and forth between CVC and AVF until when there is no more AVF opportunities, the patient dies, or when the policy in use does not suggest any more AVF surgeries.
\end{enumerate}
We discuss factors impacting the decision of whether and when to use AVF opportunities in the following sections.
\subsection{Access-Based Patient's Survival on HD}
Patients survival on HD depends on the vascular access being used \cite{perl_hemodialysis_2011, KurellaSurv}. Figure \ref{fig:survival} (left) (reproduced from \cite{perl_hemodialysis_2011}) shows that patients receiving HD via an AVF experience a better survival than those who receive it via a CVC. Nevertheless, the survival benefit of AVF over CVC, measured by the mortality rate difference, diminishes as patient continues using HD (Figure \ref{fig:survival} (right)). In addition, on-HD mortality rate of the patient for either access types increases as the on-HD duration increases.

\begin{figure}[htbp]
\centering
\includegraphics[scale=.6]{./files/survival.pdf}
\caption{On-HD survival for a 67 year old patient, reproduced from \cite{perl_hemodialysis_2011}}
\label{fig:survival}
\end{figure}
Based on these observations, we make some assumptions with respect to a patient's survival that generalize observed trends in these plots.
 
In what follows, we let $t=0$ represent the time at which a patient first begins HD (denoted by $t_0$), and then $t$ represents the time the patient has been on HD.  Also, by residual lifetime at time $t$, we mean remaining lifetime from time $t$ onward conditional on survival until time $t$. We begin with some notation and definitions:

\begin{itemize}
\item $\surv{X}{t}$: survival probability of any random variable $X$ until time $t$ 
\item $\pdf{X}{t}$: probability density function of any random variable $X$ at time $t$
\item $\hrate{X}{t}$: hazard rate of any random variable $X$ at time $t$ 
\item $X_t$: residual lifetime of any random variable $X$ at time $t$
\item $\mu(t) \in \{\text{A, C}\}$: patient's HD modality at time $t$ ($A$ if it is an AVF, and $C$, if it is a CVC).
\item $C$: random variable denoting patient's lifetime if they were to remain on a CVC from HD initiation time until death.
\item $A$: random variable denoting patient's lifetime if they were to remain on an AVF from HD initiation time until death.
\item $L_t$: random variable denoting patient's residual lifetime at time $t$.
\end{itemize}
Note that the distributions of $C$ and $A$ are dependent on a patient's age at the time HD commences, but we do not denote this dependency for ease of notation. Also, we use hazard rate, failure rate, and mortality rate interchangeably.

The followings are the definitions for two common types of stochastic order for random variables.
\begin{defn} [Usual stochastic order]
We say $X \le_{st} Y$, if and only if
\begin{align*} %\label{eq:stdef}
\surv{X}{t} \le \surv{Y}{t}: \forall t.
\end{align*}
\end{defn}
\begin{defn} [Hazard rate order]
We say $X \le_{hr} Y$, if and only if
\begin{align}\label{eq:hrdef}
\hrate{Y}{t} \le \hrate{X}{t}: \forall t.
\end{align}
\end{defn}

 Our first assumption describes how survival behaves when a patient switches from one vascular access to the other, and how the history of HD (e.g. vascular access types used) impact patient's future survival.
\begin{assu} [Survival distribution] \label{ass:surv}
A patient's survival depends on the length of duration the patient has been on HD and the current mode of HD access (an AVF or a CVC), and is independent of the history of HD access type. Mathematically, $\forall  a \ge 0$ such that $\mu(t+y)=\mu(t): \forall y: 0 \le y \le a$ (i.e., the patient remains on the same access from $t$ until time $t + a$ ), we have:
\begin{align*}
& \pr \left(L_t \ge a \big| \mu(t') :\forall t' \le t\right)=\pr \left(L_t \ge a \big| \mu(t)\right), \\
&\pr \left ( L_t \ge a \big| \mu(t)=A \right) =\surv{A_t}{a}, \\
&\pr  \left (L_t \ge a \big| \mu(t)=C\right )=\surv{C_t}{a}.
\end{align*}
\end{assu}
For technical purposes, we also assume that $\surv{A}{a}$, and $\surv{C}{a}$ are differentiable for all values of $a$. Note that this implies differentiability for  $\surv{A_t}{a}$, and $\surv{C_t}{a}$, as well.

The following two assumptions describe the relative performance of HD via a CVC versus an AVF over time. They formalize the notion that survival on an AVF is better than survival on a CVC, but that the benefit of an AVF over a CVC (measured by mortality rate difference) decreases with time.
\begin{assu} [Relative performance]  \label{ass:relative}
The residual lifetime of $C$ is stochastically smaller than the residual lifetime of $A$, at all ages. Mathematically, we have:
\begin{align*} 
 C_t \le_{st} A_t , \forall t
\end{align*}
Note that according to Lemma \ref{lem:hr_eq}, Assumption \ref{ass:relative} is equivalent to $ C \le_{hr} A$.
\end{assu}
\begin{assu} [Diminishing difference] \label{ass:converging}
The difference of hazard rates of $C$ and $A$ is decreasing in time, i.e. 
\begin{align*} %\label{eq:conv}
\hrate{C}{t} - \hrate{A}{t} \downarrow t.
\end{align*}
\end{assu}

And finally, the following assumption states that the performance of HD on either access types, measured by the mortality rate, diminishes as the patient ages.
\begin{assu} [Diminishing performance] \label{ass:IFR}
Random variable $A$, $C$ has the increasing failure rate (IFR) property, i.e. $\hrate{A}{t}$ and $\hrate{C}{t}$ are increasing in $t$.
\end{assu}


\subsection{AVF Creation Process}
After a patient decides to use an AVF for HD, s/he visits a vascular surgeon for AVF placement, and wait for an AVF surgery time. After the surgery is performed, the AVF maturation, a process by which a fistula becomes suitable to use for HD (e.g. develops adequate flow, wall thickness, and diameter), begins. The time from AVF creation to achieve an AVF usable for HD (with interventions if necessary) or AVF abandonment due to failure takes about 3 months (\cite{rayner,Ethier}). Around 60\% of placed AVFs fail to mature (\cite{Hakim,Dember, Dember2,Xue}). Even if AVF creation is successful, it has a limited lifetime (\cite{Radoui, Roy}). We use the following notation for random variables describing the AVF creation process:
\begin{itemize}
\item $W_i$: r.v. denoting wait time from AVF referral until AVF surgery of the $i$th AVF creation
\item $M_i$: r.v. denoting the maturation time of the $i$th AVF
\item $K_i$: r.v. denoting the total lifetime of the $i$th AVF  (if AVF creation is unsuccessful, then $K_i=0$)
\end{itemize}
We make the following assumption about the AVF creation process:
\begin{assu} [AVF's maturation and lifetime] \label{ass:AVFs}
All respective random variables describing creation process, i.e., $W_i, K_i,M_i$, for subsequent AVFs are identically, and independently distributed. Furthermore, their distributions are stationary (age independent), and are independent of the survival process.
\end{assu}
\noat{We need to justify these somehow?}
\subsection{Objective Functions}
\subsubsection{Total Lifetime}
A natural metric for comparing policies is the total lifetime of a patient. Thus, we consider maximizing a patient's total lifetime as one of the objective functions. 

\subsubsection{Quality Adjusted Life Expectancy} \label{sec:QALEDef}
Using AVF for HD not only brings better survival, but also has a slightly higher quality of life for the patient, in comparison with HD using a CVC (\cite{Goro}). Nevertheless, the process of AVF creation has some disutility associated with it, which can be attributed to the surgery and post-surgery inconveniences, complications or costs. We define a patient's quality adjusted life expectancy (QALE) as the quality adjusted lifetime on each vascular access minus the AVF surgery disutility for each AVF surgery performed (whether successful or failure). The following parameters are used in defining the patient's QALE:
\begin{itemize}
\item $L_A^{\pi}$, $L_C^{\pi}$: random variables denoting patient's aggregate lifetime on AVF and CVC, respectively, from HD initiation until death, under an AVF referral policy $\pi$
\item $q_a$, $q_c$: age independent quality of life coefficient of HD using AVF and CVC, respectively
\item $d$: age independent AVF creation disutility
\item $Q^{\pi}$: random variable denoting the patient's total QALE, under an AVF referral policy $\pi$
\item $N^{\pi}$: random variable denoting number of AVF surgeries performed, under an AVF referral policy $\pi$
\end{itemize}
Based on estimates in the literature, we assume $q_a \ge q_c$ (\cite{Goro}). Using this notation, we have the following equation for a patient's total QALE under an arbitrary AVF referral policy $\pi$:
\begin{align*}
Q^{\pi}=q_a L_A^{\pi}+q_c L_C^{\pi}-dN^{\pi}
\end{align*}
AVF creation disutility depends on patient's physical and mental characteristics, and thus hard to estimate. We will discuss how to circumvent this difficulty in Section \ref{sec:dis}.
\section{Optimal Policies}
\subsection{Total Lifetime} \label{sec:optTL}
Since the survival benefit of an AVF over a CVC diminishes as the patient ages (according to Assumption \ref{ass:converging}), one may think an HD patient should be referred for AVF as soon as an opportunity becomes available, rather than keeping the opportunity for later years.  We prove this below in a stochastic ordering sense: an identical patient referred for AVF earlier than another patient lives stochastically longer than that patient. Of course, this also means that the first patient has a longer expected lifetime.

\begin{thm} \label{thm:total}
Under Assumptions \ref{ass:dec}-\ref{ass:converging}, and \ref{ass:AVFs}, delaying AVF referral stochastically decreases a patient's lifetime.
\end{thm}
Note that Assumption \ref{ass:IFR} is not required for this result.
\begin{cor}\label{cor:optTotal}
The optimal policy to maximize patient's survival probability after $x$ year(s) for all $x\ge 0$ (and as a result to maximize expected lifetime) is to refer a patient on CVC for an AVF as soon as possible, provided AVF opportunities remain and no AVF is already in the process of maturing.
\end{cor}
Note that the optimal policy in this case is independent of patient's characteristics.
\subsection{QALE -- A Generic Patient} \label{sec:QALEGP}
Since the survival benefit of AVF vanishes as the patient ages, one would want to avoid the AVF creation disutility if it cannot be compensated by the better survival and quality of life associated with HD on an AVF. We will show that the optimal referral policy to maximize patient's expected QALE is of threshold type: if the patient's age at the time of decision is less than an age threshold, i.e. $t<\tau(t_0,d)$, then it is optimal to refer patient for AVF creation at the time of decision; otherwise, the optimal policy is to use CVC for the rest of patient's life.  We will prove that the critical age is independent of number of AVF chances remaining. We discuss the impact of other model components on the optimal threshold in section \ref{sec:psqale}.

\noat{ I don't know how to put this in a succint way. What I have above is wrong 1- the decision is whether to `do the surgery or not" and not `to refer or not" 2- it is on-HD duration threshold not age threshold, although there is a correspondance between them.}

\noindent To explain the dynamics of the model and prove the results, we set up a dynamic programming model using the following notation.
\begin{itemize}
\item $\pi(\tau)$: the threshold policy with parameter $\tau$ 
\item $\pi_0$: the policy of using CVC for the rest of the patient's life (thereafter referred to as the no-referral policy). Note that $\pi_0 $ is equivalent to $\pi(\tau)$ for $\tau=0$.
\item $v^\pi(t,n)$: the value function (the residual QALE of a patient) at decision state $(t,n)$ following policy $\pi$
\item $v(t,n)$: the optimal value function at decision state $(t,n)$.
\item $@_y$: the decision to refers patient for AVF creation at time $y$ (with respect to the time of decision)
\item $v(t,n,y)$: the value function of the policy consisting of action $@_y$ for the current AVF chance, and then the optimal policy for the subsequent decisions
\item $L(t,n,y)$: random variable denoting patient's residual lifetime at time $t$ under the policy mentioned above
\end{itemize}

The elements of the dynamic programming model are as follows:
\begin{itemize}
\item \textbf{States}: The set $\{(t,n), \forall t,n\ge 0 \} \cup \{\Delta\}$ defines the state space. The set of vectors $(t,n)$ consisting of $t$, the time, and $n$ the number of AVF chances left, corresponds to a living state, and the absorbing state $\Delta$ corresponds to the death state.
\item \textbf{Actions}: In each decision point (see Assumption \ref{ass:dec}), $(t,n \ge 1) $, one of two actions can be taken: either to refer patient for AVF creation at time $y$, denoted by $@_y$, or no more AVF referrals (the no-referral action). Note that the  no-referral action is the case of referral at $y=\infty$. Nevertheless, we keep it in the action space for more clarity. When $n=0$, the only option is to remain on CVC for the remainder of the patient's lifetime.  
\item \textbf{Transition probability}: We may transition to the state $\Delta$ or to $(t',n-1)$ for some $t' \ge t$. More specifically, we transition to the state $(t'=t+y+M_n+K_n,n-1)$ if $L(t,n,y) \ge y+M_n+K_n$, and to the state $\Delta$, otherwise. Note that the residual lifetime, $L(t,n,y)$, is dependent on $M_n,K_n$ as well.
\begin{figure}[htbp]
\centering
\includegraphics[scale=0.6]{./files/dp.pdf}
\caption{Dynamic programming diagram}
\label{fig:dp}
\end{figure}
\item \textbf{Immediate reward}: The immediate reward consists of patient's total QALE from time $t$ to the next state. Define the reward function $r(y,m,k,l)$ as follows, in which $m,k,l$ are instances of random variables $M,K, L(t,n,y)$ respectively (See Figure \ref{fig:dp}):
\begin{align*}
r(y,m,k,l)=\begin{cases}
q_cl& l \le y\\
-d+q_c l& y \le l \le y+m\\
-d+q_c(y+m)+q_a\big(l-(y+m)\big) & y+m \le l \le y+m+k\\
-d+q_c(y+m)+q_ak & y+m+k \le l
\end{cases}
\end{align*}
Then the immediate reward of action $@_y$ is defined as $R(t,@_y):=\Ex r\big (y,M,K,L(t,n,y)\big )$.
\item \textbf{Optimality equation}: The Bellman optimality equation is as follows:
\begin{align*}
v(t,n)= \max \{ \sup_y v(t,n,y), v^{\pi_0}(t,n) \}
\end{align*}
in which 
\begin{align*}
& v(t,0)=v^{\pi_0}(t,n)=R(t,@_\infty)=q_c\Ex C_t,\\
& v(t,n,y)= R(t,@_y)+\pr[L(t,n,y) \ge t'(y)-t].\Ex_{t'(y) \big | L(t,n,y) \ge t'(y)-t} \big[ v(t',n-1) \big].
\end{align*}
\end{itemize}

The following theorem proves the optimality of threshold policies for the case we have one AVF opportunity ($n=1$), and also establishes a foundation for finding the optimal threshold.
\begin{thm}[Existence of a referral threshold for $n=1$] \label{thm:qalen=1}
Assume $n=1$. There is an age threshold $\tau^*$ such that the policy $\pi(\tau^*)$ maximizes the expected QALE of the patient. In other words, for $t < \tau^*$, referral at $t$ is the optimal action, otherwise the no-referral action is  optimal.
\end{thm}
Note that for simplicity of notation, we don't show the dependency of the threshold on the age at HD initiation and other model parameters. 
\begin{cor}[Binary search]\label{cor:binsearch} The optimal policy can be found using a binary search for $\tau^*$ over $[0,\infty)$.
\end{cor}
In the next theorem, we see that the optimal threshold found in Theorem \ref{thm:qalen=1} for $n=1$ is also optimal for $n > 1$. Therefore, it establishes the fact that the decision of whether and when to refer for AVF creation is independent of the number of AVF chances remaining.
\begin{thm} [Optimality of threshold policies] \label{thm:QALE}
The policy $\pi(\tau^*)$ (constructed in Theorem \ref{thm:qalen=1}) is optimal for all $n \ge 1$.
\end{thm}


\subsection{Patient-Specific Considerations}\label{sec:psqale}
In this section, we discuss how the optimal policy can ba tailored for each individual based on their personal preferences and/or their physical characteristics. Since we proved that the optimal policy is of threshold type for a generic patient, we compare the optimal threshold for patients with comparable characteristics.

First, we introduce a dual view of the age-threshold policy. More specifically, we show that at any time, the decision of whether to do an AVF surgery or not is determined by comparing the patient's AVF creation disutility with a critical value.
\noat{we can revamp all the results by focusing on the critical disutility than the critical age. What do you think?}
\begin{thm} [Critical disutility] \label{thm:cdis} At any age, there exist a critical AVF creation disutility denoted by $d^{\text{cr}}(t)$, such that the optimal decision at time $t$ is to do AVF surgery immediately if patient's AVF creation disutility is not higher than the critical disutility (i.e. if $d \le d^{\text{cr}}(t)$), and is to use CVC for the rest of patient's life, otherwise.
\end{thm}
Based on this theorem, we have the following immediate result:
\begin{cor}\label{cor:dec_d}
The critical age, $\tau^*(d)$, is decreasing in $d$.
\end{cor}
We also have the following intuitive result for $d^{\text{cr}}(t)$:
\begin{cor}\label{cor:dcrt}
Critical disutility, $d^{\text{cr}}(t)$, is decreasing in $t$. Furthermore, if any of Assumptions \ref{ass:relative}-\ref{ass:IFR} hold strictly, we have that $d^{\text{cr}}(t)$ is strictly decreasing in $t$.
\end{cor}


Theorem \ref{thm:cdis} gives an alternative way of comparing the optimal policy for individual patients as follows: if the critical disutility for one patient is always smaller than another, then the first patient has a smaller age-threshold, given that both patients have the same AVF creation disutility. Recall that a patient characteristics can be summarized by the quality of life factors ($q_a, q_c, d$), distributions of random variables defining patients survival ($A$, $C$), and random variables describing AVFs creation process and lifetime ($K$, and $M$, and $W$). Thus, we can compare the critical disutility in terms of these variables.
\begin{thm}\label{thm:compdcrt}
Consider two patients types indexed by 1 and 2 whose characteristics hold the followings:
\begin{enumerate}
\item $M^{(2)} \le_{st} M^{(1)}$
\item $K^{(1)} \le_{st} K^{(2)}$
\item $A^{(1)} \le_{hr} A^{(2)}$, and $C^{(1)} \le_{hr} C^{(2)}$
\item $[\hrate{C^{(1)}}{t}- \hrate{A^{(1)}}{t}] \le [\hrate{C^{(2)}}{t} -\hrate{A^{(2)}}{t}]: \forall t$
\item $q_c^{(1)} \le q_c^{(2)}$, and $q_a^{(1)} - q_c^{(1)} \le q_a^{(2) }- q_c^{(2)}$
\end{enumerate}
where $(i)$ denotes the index. Then, $d^{\text{cr}}_{(1)}(t) \le d^{\text{cr}}_{(2)}(t): \forall t$.
\end{thm}
We conclude this section by discussing a special case of this problem.
\subsection{A Special Case}
Assume that a patient's lifetime on HD has exponential distribution, i.e. $A \sim \exp(a)$, and $C \sim \exp(c)$. Note that $\hrate{A}{t}=a$ and $\hrate{C}{t}=c$, and therefore Assumptions \ref{ass:relative}-\ref{ass:IFR} are satisfied, if and only if $a \le c$. Therefore, we assume $0<a \le c$ for the rest of this section.

The optimal policy for the total lifetime metric is independent of the distributions of $A$ and $C$. Therefore, we only discuss the optimal policy for the QALE metric. As Theorem \ref{thm:QALE} suggest, there an age-threshold when the optimal policy switches from immediate AVF surgery  to use CVC forever. But since the exponential distribution has the memoryless property and lacks any notion of aging, the optimal policy must be the same at all ages. We prove that implicitly as follows:

\begin{thm}\label{thm:exp}
Assume that $A \sim \exp(a)$, and $C \sim \exp(c)$, where $0<a \le c$. Then,
\begin{align}\label{eq:components}
d^{\text{cr}}(t) =d^{cr}:= p\bigg[\frac{q_a}{a}-\frac{q_c}{c} \bigg] \bigg[\Ex_M\big[ e^{-cM}\big]\bigg ] \bigg[1-\Ex_Z\big[ e^{-aZ}\big] \bigg].
\end{align}
where $p$ is the success probability of the AVF creation process, and $Z$ is the lifetime of a functional AVF. Furthermore, if $M\sim \exp(m)$, and $K\sim \exp(k)$, we have:
\begin{align*}
d^{\text{cr}} = p .\bigg[\frac{q_a}{a}-\frac{q_c}{c}\bigg] .  \frac{1}{1+ \frac{c}{m}} . \frac{1}{1+ \frac{k}{a}} 
\end{align*}
\end{thm}
Since $d^{\text{cr}}$ is a constant term (with respect to $t$), the optimal decision is to always (i.e. at all ages) referral immediately, if $d \le d^{\text{cr}}$, and never refer at all, otherwise.

Equation \ref{eq:components} also demonstrates how different components of the model interact. The critical disutility is product of four parts:
\begin{itemize}
\item $p$: this represents the impact of success probability of AVF creation
\item $\dfrac{q_a}{a}-\dfrac{q_c}{c}$: this represents the ideal QALE benefit of an AVF over a CVC, which we could achieve if a) AVF could mature right away, b) the surgery would always be successful, and c) AVF lived forever.
\item $0 \le \Ex_M[ e^{-cM}] \le 1$: this factor represents the delay impact of the maturation time.
\item $0 \le 1-\Ex_Z [ e^{-aZ}] \le 1$: this factor represents the impact of (limited) lifetime of an AVF.
\end{itemize}
Note that this result is in agreement with results of Theorem \ref{thm:compdcrt}.

Note that for this case $d^{cr}(t)$ can be easily calculated (recall that $\Ex_X[e^{tX}]$ is the moment generating function of a random variable $X$ evaluated at $t$).


\newpage
\section{Appendix I: Supplementary Results}
\begin{lem}\label{lem:hr_eq}
The followings are equivalent to $X \le_{hr} Y$:
\begin{enumerate}[(a)]
\item $X_t \le_{st} Y_t, \forall t.$
\item  $\dfrac{\surv{X}{t}}{\surv{Y}{t}} \downarrow t.$
\end{enumerate}

\end{lem}
\begin{proof}
~\\
For (a) see Theorem 1.B.7 in \cite{shaked2007stochastic}. For (b), see Theorem 1.3.3 in \cite{muller}.
\end{proof}
\begin{lem}[Closure of stochastic order under mixture]\label{lem:pres}
Let $X$, $Y$, $Z$ be random variables such that for all values of $z$, we have $[X | Z=z] \le_{st} [Y|Z=z]$. Then, $X \le_{st} Y$.
\end{lem}

\begin{proof}
See Theorem 1.2.15 in \cite{muller}.
\end{proof}

\begin{lem}\label{lem:ass2_a} Assumption \ref{ass:converging} is equivalent to having that $\dfrac{\surv{C}{t}}{\surv{A}{t}}$ is a log-convex function of $t$.
\end{lem}
\begin{proof}
Note that $\diff{\ln \surv{X}{t}}{t}=-\hrate{X}{t}$. Since $\diff{\ln \dfrac{\surv{C}{t}}{\surv{A}{t}}}{t}=\diff{\ln \surv{C}{t}}{t}-\diff{\ln \surv{C}{t}}{t}=\hrate{A}{t}-\hrate{C}{t}$, the result follows from Assumption \ref{ass:converging} and the fact that a differentiable function is convex if and only if its derivative is increasing.
\end{proof}
\begin{lem}\label{lem:log-conv}
Assume that function $f$ is differentiable and log-convex. Then $\frac{f(x)}{f(x+a)}$ is decreasing in $x$ for any $a \ge 0$.
\end{lem}
\begin{proof}
It suffices to show that $\ln\frac{f(x)}{f(x+a)}=\ln f(x)-\ln f(x+a)$ is decreasing in $x$. Define $F:=\ln f$, a convex function by assumption. Since $\diff{\ln\frac{f(x)}{f(x+a)}}{x}=\diff{F(x)}{x}-\diff{F(x+a)}{x} \le 0$, based on the fact that the derivative of a convex function is increasing, we have that $\ln\frac{f(x)}{f(x+a)}$ is decreasing in $x$.
\end{proof}
\begin{lem}\label{lem:IFR}
The random variable $X$ has the IFR property if and only if $X_t$ is stochastically decreasing in $t$.
\end{lem}
\begin{proof}
~\\
$\rightarrow$ Choose $t \le t'$ arbitrarily. Fix $s \ge 0$. We have $\hrate{X_{t'}}{s}=\hrate{X}{t'+s}$, and $\hrate{X_t}{s}=\hrate{X}{t+s}$. Thus, we have $\forall s, \hrate{X_t}{s} \le \hrate{X_t'}{s}$. Thus, $X_t' \le_{hr} X_t$, which implies $X_t' \le_{st} X_t$ by Lemma \ref{lem:hr_eq}.\\
$\leftarrow$ Choose $t \le t'$ arbitrarily. For all $s \ge 0$, we have $X_{t'+s} \le_{st} X_{t+s}$, thus by Lemma \ref{lem:hr_eq}, $X_{t'} \le_{hr} X_t$, and thus the result. 

\end{proof}
\begin{lem}\label{lem:res'} The mean residual lifetime of a random variable $X$ is differentiable, if $\surv{X}{t}$ is differentiable. Moreover, we have
$$\diff{\Ex X_t}{t}=\hrate{X}{t}\Ex X_t-1$$
\end{lem}
\begin{proof}
See \cite{gupta2003representing} for a proof.
\end{proof}

\newpage
\section{Appendix II: Results}
\subsection{Total Lifetime Results}
In this section, we show the proofs of the results in Section \ref{sec:optTL}.

The following lemma facilitates inferring results about the residual lifetime, and residual QALE by showing that at any time, the residual lifetime of random variables $A$, and $C$ also have the properties stated in Assumptions \ref{ass:relative}- \ref{ass:IFR}.
\begin{lem} \label{lem:assgen}
Assumptions \ref{ass:relative}-\ref{ass:IFR} apply to $A_t$, and $C_t$ as well. In other words, we have
$\forall t \ge 0$:
\begin{align*}
C_t \le_{hr} A_t,\\
\hrate{C_t}{s}-\hrate{A_t}{s} \downarrow s.
\end{align*}
\end{lem}

\begin{proof}
The result directly follows from the fact that $\hrate{X_t}{s}=\hrate{X}{t+s}$ for any random variable $X$, and $t,s\ge 0$
\end{proof}

Suppose one could set the AVF use time (rather than referral time) at $t=u$. We prove that the residual lifetime decreases stochastically in $u$. In Theorem \ref{thm:total}, we will show that this result extend to the case of AVF referral time. Before that, we provide further notation that will be used in what follows.
\begin{itemize}
\item $u$: time to use an AVF
\item $L(t,n)$: patient's residual lifetime at time $t$, given $n$ remaining AVF chances, under the optimal policy (one that maximizes patient's survival function, point-wise; if such policy exists)
\item $L(t,n,u)$: patient's residual lifetime at time $t$, when we use the first AVF chance at $t+u$, and follow the optimal policy for the subsequent $n-1$ AVF chances
\end{itemize}

\begin{prop}\label{prop:total}
Under Assumptions \ref{ass:dec}-\ref{ass:AVFs}, $L(t,n,u_2) \le_{st} L(t,n,u_1)$, whenever $u_1 \le u_2$.
\end{prop}

\begin{proof}
~\\
Let $L(u):=\big[L(t,n,u) \big |K_n=k]$. By closure of stochastic order under mixture (Lemma \ref{lem:pres}), it suffices to prove that for all $k$, $\surv{L(u)}{a}$ is decreasing in $u$ (for all $a$)  . We prove this by induction on $n$.\\

\noindent $\rightarrow$ Base case: $n=1$: \\
Depending on the values of $u, a, k$ we can calculate $\surv{L(u)}{a}$ as follows. (See Figure \ref{fig:fig1}):
\begin{figure}[htbp]
\centering
\includegraphics[scale=0.7]{./files/LT-F1.pdf}
\caption{Possible cases for $\surv{L(u)}{a}$}
\label{fig:fig1}
\end{figure}
\begin{itemize}
\item Case 1: $a \le u$: We have $\surv{L(u)}{a} \xlongequal{A(\ref{ass:surv})} \pr [C_t>a]=\surv{C_t}{a}$.
\item Case 2: $[a-k]^+ \le u \le a$: We have
\begin{align*}
\surv{L(u)}{a}&=\pr [C_t>u , A_{t+u}>a-u  ]=\pr [C_t>u]\pr[A_{t+u}>a-u \big |C_t>u]\\
& \xlongequal{A(\ref{ass:surv})} \pr [C_t>a]\pr[A_{t+u}>a-u] \xlongequal{A(\ref{ass:surv})}\pr [C_t>u].\pr[A_t>a|A_t>u]=\surv{C_t}{u}\frac{\surv{A_t}{a}}{\surv{A_t}{u}}
\end{align*}
\item Case 3: $0 \le u \le [a-k]^+$: We have:
\begin{align*}
\surv{L(u)}{a}&=\pr [C_t>u , A_{t+u}>a-u  , C_{t+u+k}>a-(u+k) ]\\
&=\pr [C_t>u].\pr[A_{u+t}>k \big |C_t>u].\pr[C_{u+k}>a-(u+k) \big |A_{t+u}>k , C_t>u]\\
&\xlongequal{A(\ref{ass:surv})} \pr [C_t>u].\pr[A_{t}>k+u \big |A_t>u].\pr[C_{t}>a \big |C_{t}>u+k]\\
& \xlongequal{A(\ref{ass:surv})} \surv{C_t}{u}.\frac{\surv{A_t}{k+u}}{\surv{A_t}{u}}\frac{\surv{C_t}{a}}{\surv{C_t}{u+k}}=
\surv{C_t}{a}.\frac{\surv{C_t}{u}}{\surv{A_t}{u}}/\frac{\surv{C_t}{u+k}}{\surv{A_t}{u+k}}
\end{align*}
\end{itemize}
in which $A(n)$ represents implication of Assumption $n$.
Note that $\surv{L(u)}{a}$ is continuous within each range, and its value on the boundary points coincides. Therefore, it suffices to prove that in each range, $\surv{L(u)}{a}$ is decreasing. In Case 1, the function is constant and thus the result holds trivially. In Case 2, since $C_t \le_{hr} A_t$ according to Lemma \ref{lem:assgen}, the function is decreasing using Lemma \ref{lem:hr_eq}. In Case 3, Lemma \ref{lem:assgen} and Lemma \ref{lem:ass2_a} imply that $\frac{\surv{C_t}{u}}{\surv{A_t}{u}}$ is log-convex in $u$. Using Lemma \ref{lem:log-conv}, we have that $\surv{L(u)}{a}$ is decreasing in $u$.\\


\noindent $\rightarrow$ Induction step: Assume $L(t,n-1,u_2) \le_{st} L(t,n-1,u_1)$, for all $u_1 \le u_2$. We prove that if $u_1 \le u_2$, then $L(t,n,u_2) \le_{st} L(t,n,u_1)$.\\

Based on the fact that stochastic order is a partial order, we can instead prove that $L(u_2) \le_{st} L'$ and $L' \le_{st} L(u_1)$, in which $L'$ is the lifetime under a hypothetical situation similar to $L(u_1)$ with the difference that the decision to use the subsequent AVF is delayed until $u_2+k$ (See Figure \ref{fig:prop1-induction}).
\begin{figure}[htbp]
\centering
\includegraphics[scale=0.70]{./files/LT-F2.pdf}

\caption{Induction step and the hypothetical random variable $L'$}
\label{fig:prop1-induction}
\end{figure}
\begin{itemize}
	\item $L(u_2) \le_{st} L'$: For $x \le u_2+k$, we have that $\surv{L(u_2)}{x}=\surv{L(t,1,u_2)}{x}$, and $\surv{L'}{x}=\surv{L(t,1,u_1)}{x}$. Thus the result follows from induction base. Otherwise, we have 
	\begin{align*}
		\surv{L(u_2)}{x}=\surv{L(u_2)}{u_2+k}.& \surv{L(u_2+k,n-1)}{x-[u_2+k]}, \\
		\surv{L'}{x}=\surv{L'}{u_2+k}.& \surv{L(u_2+k,n-1)}{x-[u_2+k]}.
	\end{align*}

	Based on the previous result, we have $\surv{L(u_2)}{u_2+k} \le \surv{L'}{u_2+k}$, and thus we get the result.
	\item $L' \le_{st} L(t,n,u_1)$. For $x \le u_1+k$, we have that $\surv{L(u_1)}{x}=\surv{L'}{x}=\surv{L(t,1,u_1)}{x}$. For $x  \ge u_1+k$, 
		\begin{align*}
			\surv{L(u_1)}{x}=\surv{L'}{u_1+k}.& \surv{L(u_1+k,n-1,0)}{x-[u_1+k]}, \\
			\surv{L'}{x}=\surv{L'}{u_1+k}.& \surv{L(u_1+k,n-1,u_2-u_1)}{x-[u_1+k]}.
		\end{align*}
		using the induction hypothesis, $L(u_1+k,n-1,u_2-u_1) \le_{st} L(u_1+k,n-1,0)$, we get the desired result.
\end{itemize}
\end{proof}

Now we extend the result of Proposition \ref{prop:total} from AVF use time to AVF referral time.\\

\begin{reptheorem}{thm:total}
Under Assumptions \ref{ass:dec}-\ref{ass:AVFs}, delaying AVF referral stochastically decreases a patient's lifetime.
\end{reptheorem}

\begin{proof}
~\\
In Proposition \ref{prop:total}, we proved that the patient's residual lifetime stochastically decreases in AVF use time. Since later referral means later AVF use time, then the patient's lifetime is stochastically decreasing in the referral time, as well. Mathematically, we can prove it as follows: let $r$ be the referral time, and $L_r$ the lifetime of the patient when the patient is referred at $r$ for the current AVF, and optimally (with respect to survival function) for the subsequent chances. Note that for AVF use time we have $u=r+W+M$. Fix $W=w,M=m$, arbitrarily. If $r_1\le r_2$, then $u_1 \le u_2$. As a result of Proposition \ref{prop:total}, 
$$\forall w,m:[L_{r_2}\big| W=w, M=m] \le_{st} [L_{r_1}\big|W=w, M=m].$$ 
Now, by closure of stochastic order under mixture (Lemma \ref{lem:pres}), this implies $L_{r_2} \le_{st} L_{r_1}$.
\end{proof}

\subsection{QALE -- A Generic Patient}
In this section, we provide the proofs for Section \ref{sec:QALEGP}.

We restrict ourselves to threshold policies that are not dominated by $\pi_0$ when applied at any decision state $(t,n)$. Let $T$ be the set of such thresholds, i.e. $$T=\{\tau: v^{\pi(\tau)}(t,n) \ge {v^{\pi_0}(t,n)} : \forall t,n  \}.$$
Note that $T$ is non-empty since $0 \in T$. We will show that there exist $ \tau^* \in T$ such that $\pi(\tau^*)$ is optimal. In order to prove this, we first show that for all $\tau \in T$, the difference of the performance of the threshold policy $\pi(\tau)$ and $\pi_0$ decreases in $t$. We prove some preliminary results before proving this claim in Proposition \ref{prop:thresh_dec}.

\begin{lem} \label{lem:dec_v}
$ \diff{v(t,1,0|M_1=m,K_1=k)}{k}$ is non-negative and decreasing in $t$.
\end{lem}
\begin{proof}
~\\
Let $w(t,m,k):=v(t,1,0|M_1=m,K_1=k)$. We can calculate $w(t,m,k)$ as follows:
\begin{align} \label{eq:decind} \nonumber
w(t,m,k)=-d+ q_c\int_{0}^{m} x\pdf{C_t}{x}dx\\
 +\surv{C_t}{m}\bigg [q_cm+ & q_a\int_{0}^{k} x \pdf{A_{t+m}}{x} dx+\surv{A_{t+m}}{k} \big[q_ak+q_c\Ex C_{t+m+k} \big ] \bigg].
\end{align}
We can establish differentiability of $w$ as follows. Note that $\forall x\ge 0$,  $\surv{A_{x}}{k}$ and $\surv{C_{x}}{k}$ are differentiable in $k$ by Assumption \ref{ass:surv}. Differentiability pf the latter itself implies differentiability of  $\Ex C_{x+k}$ by Lemma \ref{lem:res'}. These two imply that $w(t,m,k)$ is differentiable in $k$.\\

Using Lemma \ref{lem:res'}, we have
\begin{align} \label{eq:w'}
 \nonumber \diff {w(t,m,k)}{k}=& \surv{C_t}{m} \bigg [ \diff{q_a\int_{0}^{k} x \pdf{A_{t+m}}{x} dx}{k} + \surv{A_{t+m}}{k} \diff{\big[q_ak+q_c\Ex C_{t+m+k} \big ]}{k} +  \big [\diff{\surv{A_{t+m}}{k}}{k}\big ]\big[q_ak+q_c\Ex C_{t+m+k} \big ] \bigg ]\\ \nonumber
=& \surv{C_t}{m} \bigg [ q_ak\pdf{A_{t+m}}{k}+\surv{A_{t+m}}{k} \big\{ q_a+q_c \big [\hrate{C_{t+m}}{k} \Ex C_{t+m+k}-1]\big\} 
-\pdf{A_{t+m}}{k}  [q_ak+q_c \Ex C_{t+m+k} ]  \bigg] \\
=& \surv{C_t}{m} \surv{A_{t+m}}{k} \bigg[q_a-q_c+q_c \Ex C_{t+m+k} \big [\hrate{C_{t}}{m+k}-\hrate{A_t}{m+k}\big] \bigg]
\end{align}

We can prove that $\diff {w(t,m,k)}{k}$ is decreasing in $t$, and non-negative by showing that it is product of the following three non-negative decreasing functions:

\begin{enumerate}
\item $\surv{C_t}{m}$: This is decreasing in $t$, since $C_t$ is stochastically decreasing in $t$ based on Assumption \ref{ass:IFR} and Lemma \ref{lem:IFR}.
\item $\surv{A_{t+m}}{k}$: This is decreasing in $t$, since $A_{t+m}$ is stochastically decreasing in $t$ based on Assumption \ref{ass:IFR} and Lemma \ref{lem:IFR}.
\item $q_a-q_c+q_c \Ex C_{t+m+k} \big [\hrate{C_{t}}{m+k}-\hrate{A_t}{m+k}\big]$:
\begin{itemize}
\item non-negative: We have that $q_a \ge q_c$. Also, $\hrate{C_{t}}{m+k} \ge \hrate{A_t}{m+k}$ based on Lemma \ref{lem:assgen}.
\item decreasing: $\Ex C_{t+m+k}$ is decreasing in $t$, because $C_{t+m+k}$ is stochastically decreasing in $t$. Also, $\hrate{C_{t}}{m+k}-\hrate{A_t}{m+k}$ is decreasing in $t$ based on Lemma \ref{lem:assgen}.
\end{itemize} 
\end{enumerate}
\end{proof}
The following intermediate results will provide a foundation for validity of the binary search for the optimal threshold in later results:
\begin{prop} \label{prop:v1}
$ v(t,1,0)-v^{\pi_0}(t,1)$ is decreasing in $t$.
\end{prop}
\begin{proof}
Choose $t_1 \le t_2$ arbitrarily. We have that $\forall m:\diff {[w(t_2,m,k)-w(t_1,m,k)]}{k} \le 0$ by the linearity of the differential operator, and Lemma \ref{lem:dec_v}. This implies that $$\forall k,m: w(t_2,m,k)-w(t_1,m,k) \le w(t_2,m,0)-w(t_1,m,0).$$
But, $\forall m:w(t,m,0)=-d+v^{\pi_0}(t,1) $. Thus, 
$$\forall k,m:  w(t_2,m,k)-w(t_1,m,k) \le v^{\pi_0}(t_2,1)-v^{\pi_0}(t_1,1).$$
Since $v(t,1,0) = \Ex_{M_1,K_1} w(t,m,k)$ by Assumption \ref{ass:AVFs}, taking expectation from both sides with respect to $M_1,K_1$ gives us:
$$v(t_2,1,0)-v(t_1,1,0) \le v^{\pi_0}(t_2,1)-v^{\pi_0}(t_1,1).$$
\end{proof}
Note that  if any of Assumptions \ref{ass:relative}-\ref{ass:IFR} hold strictly, we have that the monotonicity is strict for Lemma \ref{lem:dec_v}, and Propositions \ref{prop:v1} and \ref{prop:thresh_dec}.

Now, we prove our claim:
\begin{prop} \label{prop:thresh_dec} For all $\tau \in T$, we have $\forall n: v^{\pi(\tau)}(t,n)- {v^{\pi_0}(t,.)} \downarrow t$.
\end{prop} 
\begin{proof}
~\\ Note that since $M_i,K_i$ are independent of the survival process and the policy in use (based on Assumption \ref{ass:AVFs}), we have that 
\begin{align*}
v^{\pi}(t,n)=\Ex \big [ v^{\pi}(t,n \big |M_1,\ldots, M_n,K_1,\ldots, K_n)\big].
\end{align*}
We prove the result by induction on $n$ as follows:
\begin{itemize}
\item $n=1$: For $v^{\pi(\tau)}(t,1)$ we have:
\begin{align*}
v^{\pi(\tau)}(t,1)- v^{\pi_0}(t,1)=
\begin{cases}
v(t,1,0)-v^{\pi_0}(t,1)& t \le \tau \\
0& o.w.
\end{cases}
\end{align*}

The function is decreasing  for $t \le \tau$ by Corollary \ref{prop:v1}, and for $t > \tau$ trivially. It suffices to have that $v^{\pi(\tau)}(\tau,n) - v^{\pi_0}(\tau,1) \ge 0$, which holds by the fact that $\tau \in T$.
\item Assume the result holds for $n=1,\ldots, k$. We prove that it holds for $n=k+1$.

To that end, we first prove the following:
\begin{align} \label{eq:relative}
\forall n>1: v^{\pi(\tau)}(t,n) - v^{\pi(\tau)}(t,n-1) \downarrow {t}.
\end{align}
Since $v^{\pi(\tau)}(t,k)- {v^{\pi_0}(t,.)}=[v^{\pi(\tau)}(t,k+1)- v^{\pi(\tau)}(t,k)]+[v^{\pi(\tau)}(t,k)- {v^{\pi_0}(t,.)}]$, the result will then follow from the fact that the sum of two decreasing functions is decreasing.\\
Now we show Equation \ref{eq:relative} as follows:\\
We first fix $M_i=m_i$, and $K_i=k_i$ for $i=2,\ldots,n$ arbitrarily.  The result generalizes using the preservation of monotonicity under expectation.\\
Let $t':=\sum_{i=2}^{n} m_i+k_i$, and based on whether $ t+t' \le \tau$, consider two cases:
\begin{itemize}
\item $t+t' \le \tau$: In this case the last referral happens at time $t+t'$, provided that patient survives until that time; The difference in the cases of $n$, and $n-1$ remaining AVF chances is the (possible) use of one AVF chance. Let $S(t,t')$ represent the probability of survival of the patient until time $t'$. Then, we have the following:
\begin{align} \label{eq:deltav}
[ v^{\pi(\tau)}(t,n)- v^{\pi(\tau)}(t,n-1)]\big | M_{2,\ldots,n},K_{2,\ldots,n} =S(t',t) \big[v^{\pi(\tau)}(t+t',1)-v^{\pi_0}(t+t',1)\big].
\end{align}
\item Otherwise; $\exists j: t+\sum_{i=j+1}^{n} [m_i+k_i] > \tau$. Based on $\pi(\tau)$, we don't refer for the remaining AVF chances after $\tau$, and thus we have  
$v^{\pi(\tau)}(t,n|M_{2,\ldots,n},K_{2,\ldots,n})=v^{\pi(\tau)}(t,n-l|M_{2,\ldots,n},K_{2,\ldots,n})$ for all $l \le j$. \\
\end{itemize}
Thus, we have
\begin{align}\label{eq:deltavn}
[ v^{\pi(\tau)}(t,n)- v^{\pi(\tau)}(t,n-1)]\big | M_{2,\ldots,n},K_{2,\ldots,n}=
\begin{cases}
S(t',t) \big[v^{\pi(\tau)}(t+t',1)-v^{\pi_0}(t+t',1)\big]&:  t +t' \le  \tau \\
0 &: o.w.
\end{cases}
\end{align}

It suffices to show that $S(t',t) \big[v^{\pi(\tau)}(t+t',1)-v^{\pi_0}(t+t',1)\big] $ is decreasing in $t$ and non-negative. We prove that by showing that it is the product of the following two non-negative and decreasing functions:
\begin{itemize} 
\item $S(t',t)$: The probability is non-negative by definition. First we compute $S(t',t)$ as follows:
\begin{align*}
S(t',t)=&\pr [C_t>m_n , A_{t+m_n}>k_n,\ldots ,A_{t+t'-k_2}>k_2]\\
=&\pr[C_t>m_n] \pr[A_{t+m_n}>k_n|C_t>m_n]\ldots  \pr [A_{t+t'-k_2}>k_2|C_t>m_n ,\ldots]\\
&\xlongequal{A(\ref{ass:surv})}\surv{C_t}{m_n}\surv{A_{t+m_n}}{k_n}\ldots \surv {A_{t+t'-k_2}}{k_2}
\end{align*}
Each of the survival probabilities are decreasing in $t$ because $A_{t+x}$ and $C_{t+x}$ are stochastically decreasing in $t$, for any $x\ge 0$ by Assumption \ref{ass:IFR}.
\item $v^{\pi(\tau)}(t+t',1)-v^{\pi_0}(t+t',1)$: This term is non-negative since $\tau \in T$. Also, this term is decreasing in $t$ using the result of the case $n=1$.
\end{itemize}
\end{itemize}
\end{proof}

The following corollary is used in extending the optimality of the policy found for $n=1$ to $n>1$.
\begin{cor} \label{cor:NWN}
If $\forall t: v^{\pi(\tau)}(t,1) \ge v^{\pi_0}(t,1)$, then  $v^{\pi(\tau)}(t,n)$ is increasing in $n$. As a result  $\tau \in T$.
\end{cor}
\begin{proof}
~\\

Since $\forall t: v^{\pi(\tau)}(t,1) \ge v^{\pi_0}(t,1)$ by assumption, from Equation \ref{eq:deltavn}, we have $$[ v^{\pi(\tau)}(t,n)- v^{\pi(\tau)}(t,n-1)]\big | M_{2,\ldots,n},K_{2,\ldots,n}  \ge 0.$$ Taking expectation with respect to $ M_{2,\ldots,n},K_{2,\ldots,n}$, we get:
\begin{align*} 
 v^{\pi(\tau)}(t,n) \ge v^{\pi(\tau)}(t,n-1)\ge \ldots \ge v^{\pi(\tau)}(t,1) \ge v^{\pi_0}(t,1).
\end{align*}
\end{proof}
Before proving the main results, we prove the following preliminary result.
\begin{lem} \label{lem:delta}
The following equality holds for $v(t,n,y)$.
\begin{align*} 
v(t,n,y)&=\surv{C_t}{y}\bigg[ v(t+y,n,0)-v^{\pi_0}(t+y,n)\bigg]+v^{\pi_0}(t,n)
\end{align*}
\end{lem}

\begin{proof}
 ~\\We can calculate $v(t,n,y)$ as follows:
\begin{align*}
v(t,n,y)&=q_c\int_{0}^{y} x\pdf{C_t}{x} dx +\surv{C_t}{y} \big [q_c y+v(t+y,n,0) \big ]
\end{align*}
After rearranging terms, we get:
\begin{align*}
v(t,n,y)&=\surv{C_t}{y}\big[ v(t+y,n,0)-v^{\pi_0}(t+y,n)\big]+q_c\int_{0}^{y} x\pdf{C_t}{x} dx+\surv{C_t}{y} \big [q_c y+v^{\pi_0}(t+y,n) \big]\\
	&=\surv{C_t}{y}\big[ v(t+y,n,0)-v^{\pi_0}(t+y,n)\big]+q_c\left[ \int_{0}^{y} x\pdf{C_t}{x} dx+\surv{C_t}{y} [ y+\Ex C_{t+y} ]\right]\\
	&=\surv{C_t}{y}\big[ v(t+y,n,0)-v^{\pi_0}(t+y,n)\big]+q_c\Ex C_{t}\\
	&=\surv{C_t}{y}\big[ v(t+y,n,0)-v^{\pi_0}(t+y,n)\big]+v^{\pi_0}(t,n).
\end{align*}
\begin{figure}[htbp]
\centering
\includegraphics[scale=0.6]{./files/QALE-F1.pdf}
\caption{$v(t,n,y)$ versus $v^{\pi_0}(t,n)$ }
\label{fig:lem}
\end{figure}
Consider Figure \ref{fig:lem}. Another way of proving this equality is by calculating $v(t,n,y)- v^{\pi_0}(t,n)$ as follows:
\begin{align*} 
v(t,n,y)- v^{\pi_0}(t,n)=&\bigg[\big(v(t,n,y)- v^{\pi_0}(t,n)\big) \big | C_t \le y \bigg]\pr [C_t \le y]+\bigg[\big(v(t,n,y)- v^{\pi_0}(t,n)\big) \big | C_t > y \bigg]\pr [C_t > y]\\
&=\pr [C_t \le y](0)+\pr [C_t > y]\big[v(t+y,n,0)- v^{\pi_0}(t+y,n)\big]\\
&=\surv{C_t}{y}\big[ v(t+y,n,0)-v^{\pi_0}(t+y,n)\big].
\end{align*}
\end{proof}
\noat{I guess we can remove the first proof.\\}
The following theorem proves the optimality of threshold policies for the case we have one AVF opportunity ($n=1$), and also establishes a base for finding the optimal threshold.
\begin{reptheorem}{thm:qalen=1}[Existence of a referral threshold for $n=1$]
Assume $n=1$. There is an age threshold $\tau^*$ such that the policy $\pi(\tau^*)$ maximizes the expected QALE of the patient. In other words, for $t < \tau^*$, referral at $t$ is the optimal action, otherwise the no-referral action is  optimal.
\end{reptheorem}
\begin{proof}
~\\
Fix $t$, and $n$.  Assume that we take action $@_y$. By Lemma \ref{lem:delta}, we have:
\begin{align*}
v(t,1,y)=\surv{C_t}{y}\big[ v(t+y,n,0)-v^{\pi_0}(t+y,n)\big]+v^{\pi_0}(t,n)
\end{align*}
For $@_y$ to be an optimal action candidate, it is necessary that referral at $t+y$ is no worse than the no-referral action, i.e. $v(t+y,1,0) \ge v^{\pi_0}(t+y,1)$.\\
Since $v(t+y,1,0) - v^{\pi_0}(t+y,1)$ is decreasing in $y$ by Corollary \ref{prop:v1}, and $\surv{C_t}{y}$ is decreasing in $y$, then $v(t,1,y)$ is decreasing in $y$ for all $y$ that satisfy the necessary condition. Thus, the optimal action is to refer at $t$ if $v(t,1,0) \ge v^{\pi_0}(t,1)$, and no-referral, if otherwise.

Now, we form the policy $\pi(\tau^*)$ as follows based on whether $v(0,1,0) < v^{\pi_0}(0,1)$ or not.
\begin{itemize}
\item $v(0,1,0) < v^{\pi_0}(0,1)$: we have that for $\forall t:v(t,1,0) < v^{\pi_0}(t,1)$, since $v(t,1,0) - v^{\pi_0}(t,1)$ is decreasing in $t$ by Corollary \ref{prop:v1}. As a result, the no-referral action is optimal for all $t$. Choose  $\tau^*=0$ in this case.
\item $v(0,1,0) \ge v^{\pi_0}(0,1)$: we have that $\exists t'\le \infty$ such that for $t \le t'$, we have $v(t,1,0) \ge v^{\pi_0}(t,1)$, and $v(t,1,0) < v^{\pi_0}(t,1)$ otherwise. For $t \le t'$, referral at $t$ is optimal, and the no-referral action is optimal, if otherwise. Choose $\tau^*=t'$ in this case.
\end{itemize}
The policy $\pi(\tau^*)$ is optimal for $n=1$ by construction.
\end{proof}

\begin{repcorollary}{cor:binsearch}[Binary search] The optimal policy can be found using a binary search for $\tau^*$ over $[0,\infty)$.
\end{repcorollary}
\begin{proof} See how the optimal policy is formed in Theorem \ref{thm:qalen=1}.
\end{proof}
In the next theorem, we see that the optimal threshold found in Theorem \ref{thm:qalen=1} for $n=1$ is also optimal for $n > 1$.
\begin{reptheorem}{thm:QALE} [Optimality of threshold policies]
The policy $\pi(\tau^*)$ (constructed in Theorem \ref{thm:qalen=1}) is optimal for all $n \ge 1$.
\end{reptheorem}

\begin{proof}
~\\
We prove the theorem by induction on $n$:

\begin{itemize}
\item $n=1$: The policy $\pi(\tau^*)$ is optimal for $n=1$ by construction.
\item Assume for $n=1,\ldots, k$ the threshold policy $\pi(\tau^*)$ is optimal. We prove that, it is optimal for $n=k+1$ as well.\\
We prove the optimality of the policy $\pi(\tau^*)$ as follows:
\begin{itemize}
\item $t >\tau^*$: The policy suggests the no-referral action. We argue that it is optimal as follows.\\
Note that based on the optimality of $\pi(\tau^*)$ for $n \le k$, the last $k$ AVF chances won't be used, because their referral would happen at some $t' \ge t > \tau^*$.
Thus, we are left with one AVF chance. Similarly, we should not use that chance either. Thus, the no-referral action is optimal in this case.

\item  $t \le \tau^*$: The policy suggests referral at $t$. We argue that it is optimal as follows.\\
Note that no referral can be made later than $\tau^*$ (using the logic explained in the first case). Thus, we restrict our attention to $y \le \tau^*-t$. For all such $y$, we have that $v(t+y,n,0)= v^{\pi(\tau ^*)}(t+y,n)$, based on the induction assumption. By Lemma \ref{lem:delta}, we have
$$v(t,n,y)=\surv{C_t}{y}\bigg[v^{\pi(\tau ^*)}(t+y,n)-v^{\pi_0}(t+y,n)\bigg]+v^{\pi_0}(t,n).$$ 


 We conclude the proof by showing $v(t,n,y)$ is decreasing in $y$. Since $\surv{C_t}{y}$ is decreasing in $y$, it suffices to show that $v^{\pi(\tau ^*)}(t+y,n)-v^{\pi_0}(t+y,n)$ is non-negative and decreasing in $y$.

Since $\pi(\tau^*)$ is optimal for $n=1$, then $\forall t: v^{\pi(\tau^*)}(t,1) \ge v^{\pi_0}(t,1)$. As a result of Corollary \ref{cor:NWN}, we have $\tau^* \in T$ by . This implies  $$v^{\pi(\tau ^*)}(t+y,n) \ge v^{\pi_0}(t+y,n).$$ 

Since $\tau^* \in T$, using Proposition \ref{prop:thresh_dec}, we have that $v^{\pi(\tau ^*)}(t+y,n)- v^{\pi_0}(t+y,n)\downarrow y$.
\end{itemize}
\end{itemize}
\end{proof}

\subsection{Patient-Specific Considerations}
In this section, we provide the proofs of Section \ref{sec:psqale}.

We prove a preliminary result first:
\begin{thm}\label{thm:dec_d}
The critical age, $\tau^*(d)$, is decreasing in $d$.
\end{thm}
\begin{proof}
~
\\ Choose $0 \le d_1 \le d_2$ arbitrarily. By Theorem \ref{thm:QALE}, there exist $\tau^*_1$ and $\tau^*_2$ for which threshold policies $\pi(\tau^*_1)$ and $\pi(\tau^*_2)$ are optimal for cased $d=d_1$, and $d=d_2$, respectively.
Based on the proof of Theorem \ref{thm:qalen=1}, 
\begin{align} \label{eq:IFFTHLD}
t' \ge \tau^*(d) \iff v(t',1,0 ;d) < v^{\pi_0}(t',1)
\end{align}
As a result, $v(\tau^*_1,1,0 ;d_1) < v^{\pi_0}(\tau^*_1,1)$. But, $\forall t': v(t',1,0 ;d) - v^{\pi_0}(t',1)$ is (linearly) decreasing in $d$ (see Equation \ref{eq:decind}). Thus, $v(\tau^*_1,1,0 ;d_2) < v^{\pi_0}(\tau^*_1,1)$. By Equation \ref{eq:IFFTHLD}, we have $ \tau^*_2 \le \tau^*_1$.
\end{proof}


\begin{reptheorem}{thm:cdis} At any age, there exist a critical AVF creation disutility denoted by $d^{\text{cr}}(t)$, such that the optimal decision at time $t$ is to do AVF surgery immediately if patient's AVF creation disutility is not higher than the critical disutility (i.e. if $d \le d^{\text{cr}}(t)$), and is to use CVC for the rest of patient's life, otherwise.
\end{reptheorem}
\begin{proof}
~\\Fix $t$. Define $\Delta Q(t)$ as the benefit of an immediate AVF surgery vs using CVC for the rest of the patient's lifetime before subtracting the AVF creation disutility. In other words, $\Delta Q(t)=d+v(t,1,0) -v^{\pi_0}(t,1)$. By Theorem \ref{thm:d=0}, $\Delta Q(t) \ge 0$. By Proposition \ref{prop:thresh_dec}, we have that $\Delta Q(t)$ is decreasing in $t$. In Theorem \ref{thm:compdcrt}, we prove that $\Delta Q(t)$ is decreasing in $M$ and increasing in $K$. Thus, $\Delta Q(t) \le \Ex [A_t - C_t]$, in which the upper bound is achieved when $K=\infty$, and $M=0$ with probability one. Therefore, $\Delta Q(t)$ is finite. By Equation \ref{eq:IFFTHLD}, $t < \tau^*(d)$, and immediate surgery is optimal, if and only if $\Delta Q(t) \ge d$. Since $\Delta Q(t)$ is decreasing in $t$, we have $d^{\text{cr}}(t)=\Delta Q(t)$. 
\end{proof}
\begin{repcorollary}{cor:dcrt}
Critical disutility, $d^{\text{cr}}(t)$, is decreasing in $t$. Furthermore, if any of Assumptions \ref{ass:relative}-\ref{ass:IFR} hold strictly, we have that $d^{\text{cr}}(t)$ is strictly decreasing in $t$.
\end{repcorollary}
\begin{proof}
The results immediately follows from the definition of $\Delta Q(t)$ and Proposition \ref{prop:thresh_dec}.
\end{proof}

\begin{reptheorem}{thm:compdcrt}
Consider two patients types indexed by 1 and 2 whose characteristics hold the followings:
\begin{enumerate}
\item $M^{(2)} \le_{st} M^{(1)}$
\item $K^{(1)} \le_{st} K^{(2)}$
\item $A^{(1)} \le_{hr} A^{(2)}$, and $C^{(1)} \le_{hr} C^{(2)}$
\item $[\hrate{C^{(1)}}{t}- \hrate{A^{(1)}}{t}] \le [\hrate{C^{(2)}}{t} -\hrate{A^{(2)}}{t}]: \forall t$
\item $q_c^{(1)} \le q_c^{(2)}$, and $q_a^{(1)} - q_c^{(1)} \le q_a^{(2) }- q_c^{(2)}$
\end{enumerate}
where $(i)$ denotes the index. Then, $d^{\text{cr}}_{(1)}(t) \le d^{\text{cr}}_{(2)}(t): \forall t$.
\end{reptheorem}
\begin{proof}
~\\ Recall from Lemma \ref{lem:dec_v},  $v(t,1,0|M_1=m,K_1=k)$ is increasing in $k$. As a result, $\Delta Q(t|M=m, K=k)$ is increasing in $k$. Below, we show that it is decreasing in $m$. Similar to the proof of Lemma \ref{lem:dec_v}, we can show that $\diff{v(t,1,0|M_1=m,K_1=k)}{k}$ is decreasing in $m$. By following proof of Corollary \ref{prop:v1}, we have that
$$\forall k,t:  w(t,m_1,k)-w(t,m_2,k) \le v^{\pi_0}(t,1)-v^{\pi_0}(t,1)=0.$$
and hence $\Delta Q(t|M=m, K=k)$ is decreasing in $m$.


Using Equation \ref{eq:w'} and the assumptions 3-5 of the Theorem, we have that $\diff {w_{(1)}(t,m,k)}{k} \le \diff {w_{(2)}(t,m,k)}{k}$. By following proof of Corollary \ref{prop:v1}, we can show that
$$\forall k,t:  w_{(1)}(t,m,k)-w_{(2)}(t,m,k) \le v^{\pi_0}(t,1)-v^{\pi_0}(t,1).$$
and hence
$$\Delta Q_{(1)}(t|M=m, K=k) \le \Delta Q_{(2)}(t|M=m, K=k).$$
Using the fact that $\Delta Q(t|M=m, K=k)$ is increasing in $k$ and decreasing in $m$, and assumptions 1 and 2 of the Theorem, we have $\Delta Q_{(1)}(t) \le \Delta Q_{(2)}(t)$ and thus the result.
\end{proof}
\subsection{A Special Case}
\begin{reptheorem}{thm:exp}
Assume that $A \sim \exp(a)$, and $C \sim \exp(c)$, where $0<a \le c$. Then,
\begin{align}\label{eq:comp2}
d^{\text{cr}}(t) =d^{cr}:= p\bigg[\frac{q_a}{a}-\frac{q_c}{c} \bigg] \bigg[\Ex_M\big[ e^{-cM}\big]\bigg ] \bigg[1-\Ex_Z\big[ e^{-aZ}\big] \bigg].
\end{align}
where $p$ is the success probability of the AVF creation process, and $Z$ is the lifetime of a functional AVF. Furthermore, if $M\sim \exp(m)$, and $K\sim \exp(k)$, we have:
\begin{align*}
d^{\text{cr}} = p .\bigg[\frac{q_a}{a}-\frac{q_c}{c}\bigg] .  \frac{1}{1+ \frac{c}{m}} . \frac{1}{1+ \frac{k}{a}} 
\end{align*}
\end{reptheorem}
\begin{proof}
~\\Using equation \ref{eq:w'}, we have:
\begin{align*}
\diff {w(t,m,k)}{k}=e^{-cm}.e^{-ak}\big[q_a-q_c+q_c\frac{1}{c}(c-a)\big]=e^{-cm}.[ae^{-ak}]\big[\frac{q_a}{a}-\frac{q_c}{c}\big]
\end{align*}
Recall that $\forall m:w(t,m,0)=-d+v^{\pi_0}(t,1)$. Therefore, by taking integration with respect to $k$:
\begin{align*}
\Delta Q(t \big |M=m, K=k)=d+w(t,m,k)-v^{\pi_0}(t,1)=\big[\frac{q_a}{a}-\frac{q_c}{c} \big] e^{-cm}[1-e^{-ak}\big].
\end{align*}
Taking expectation with respect to $M$, and $K$ proves Equation \ref{eq:comp2}.

Now consider the case that $M$ and $K$ follow exponential distributions. Using the moment generation function of an exponential distribution $X \sim \exp(\lambda)$, we have
\begin{align}\label{eq:MGexp}
\Ex_X[e^{-tX}]=(1+\frac{t}{\lambda})^{-1}
\end{align}
Combining Equations \ref{eq:MGexp} and \ref{eq:comp2}, we get the desired result.
\end{proof}



\newpage
\bibliography{refs}
\newpage
\end{document}










